State Results

priors_versions <- c("v1", "v2", "v3", "v4")


versions <- tibble(
  version = c("v1", "v2", "v3", "v4"),
  vlabel = factor(
    c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
    "$\\beta$ Centered at Emp. Value",
    "$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
    "$P(S_1|untested)$ Centered at Emp. Value"),
    levels = c(
        "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
        "$\\beta$ Centered at Emp. Value",
        "$P(S_1|untested)$ Centered at Emp. Value",
        "$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values"
        
    ) )
)


state_corrected_path <- here("analysis/results/adj_biweekly_state/")


################################
# ESTIMATED
################################
dates <- readRDS(here("data/date_to_biweek.RDS")) %>%
  group_by(biweek) 



covidestim <- readRDS(here('data/covidestim/covidestim_biweekly_all_states.RDS')) %>% 
  select(-date)


corrected <- map_df(priors_versions, ~readRDS(
        paste0(state_corrected_path, "adj_",
               .x, 
               ".RDS")) %>% 
          mutate(version = .x)) %>%
  left_join(dates, relationship = "many-to-many") %>% 
  rename(state=fips) %>%
  left_join(versions) %>%
  mutate(state=toupper(state)) %>%
  left_join(covidestim, relationship= "many-to-many")
## Joining with `by = join_by(biweek)`
## Joining with `by = join_by(version)`
## Joining with `by = join_by(biweek, state)`

Summarizing Concordance with Covidestim

# corrected %>%
#   mutate(in_interval = case_when(
#     exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
#     exp_cases_lb  > infections  ~ "Below Interval",
#     exp_cases_ub < infections ~ "Above Interval"
#   )) %>%
#   filter(!is.na(in_interval)) %>%
#   group_by(in_interval, state, vlabel) %>%
#   summarize(n_per_county=n()) %>%
#   group_by(vlabel, in_interval) %>%
#   summarize(n_per_version = sum(n_per_county)) %>%
#   group_by(vlabel) %>%
#   mutate(total = sum(n_per_version)) %>%
#   ungroup() %>%
#   mutate(prop=n_per_version/total)



by_version <- corrected %>%
  mutate(in_interval = case_when(
    exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
    exp_cases_lb  > infections  ~ "Below Interval",
    exp_cases_ub < infections ~ "Above Interval"
  )) %>%
  filter(!is.na(in_interval)) %>%
  group_by(in_interval, vlabel) %>%
  summarize(n=n()) %>%
  group_by(vlabel) %>%
  mutate(total = sum(n)) %>%
  mutate(prop=n/total) 
## `summarise()` has grouped output by 'in_interval'. You can override using the
## `.groups` argument.
labels <- by_version %>%
  arrange(prop) %>%
  pull(vlabel) %>%
  as.character() %>%
  unique()


by_version %>%
   ggplot(aes(x= fct_reorder(vlabel,prop,max), 
             fill = in_interval, y =prop)) + 
  geom_bar(stat="identity",position="dodge") +
  theme_c() +
  coord_flip()+ scale_fill_manual(values=c("In Interval" = "#79D2AF",
                             "Above Interval" = "#D2AF79",
                      "Below Interval"="#7997D2")) +
  labs(x="", 
       y= "Proportion",
       fill = "Covidestim Median:",
       title = "Proportion Where Covidestim Median\nWas Within, Above, or Below the Median",
       subtitle = "State Level") +
  theme_c() +
  theme(legend.position="right",
          legend.title = element_text(face="bold", size = 15),
          legend.text= element_text( size = 15),
          legend.spacing.y = unit(3, 'pt'),
          axis.text.y = element_text(size = 17, hjust=1)) +
  scale_x_discrete(labels = (TeX(labels)))

ggsave(here('presentation/figure/covidestim_concordance_state.jpeg'), height=8, width=14)

Testing Rate over Time

subset <- corrected %>%
  filter(vlabel == "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$")


subset %>%
  ggplot(aes(x=date, y =total/population))+
  geom_line() +
  facet_wrap(~state) +
  theme_c() +
  labs(title ="Biweekly Testing Rate",
       y = "Total Number of Tests / Population Size")

subset %>%
  ggplot(aes(x=date, y =total/population))+
  geom_line() +
  facet_wrap(~state) +
  theme_c() +
  labs(title ="Biweekly Testing Rate",
       y = "Total Number of Tests / Population Size")

###########################################################################
# plot relationship between ratio of estimated cases to observed 
# against testing rate 
###########################################################################
corrected %>%
  ggplot(aes(x=total/population, y = exp_cases_median/positive)) +
  geom_point(alpha=.008, size=.8) +
  facet_wrap(~vlabel,
               labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  theme_c(axis.text.x = element_text(size =9),
          axis.title = element_text(size=16) ) +
  theme(strip.text.x = element_text(size=11, color="white")) +
  scale_y_continuous(n.breaks=10) +
  geom_hline(yintercept=1, linetype=2,color="darkred", alpha=.8, linewidth=.5) +
  labs(y = "Ratio of Estimated Infections to Observed",
       x = "Testing Rate",
       subtitle = "All Time Intervals and All States",
       title = "Relationship Between Testing Rate and\nRatio of Estimated Infections to Observed")+
  scale_x_continuous(limits=c(0,.25), n.breaks=4)
## Warning: Removed 228 rows containing missing values (`geom_point()`).

ggsave(here('thesis/figure/testing_rate_ratio.jpeg'), width=10,height=6)
## Warning: Removed 228 rows containing missing values (`geom_point()`).
# thinking about student populations

# https://nces.ed.gov/ipeds/TrendGenerator/app/trend-table/2/2?trending=column&rid=6
# Source: U.S. Department of Education, National Center for Education Statistics, Integrated Postsecondary Education Data System (IPEDS), 12-month Enrollment component final data (2001-02 - 2019-20) and provisional data (2020-21).

students <- read_csv(here('data/demographic/student_population.csv'), skip=2, n_max=4)
## Rows: 4 Columns: 53
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Year
## num (52): Total, Alabama, Alaska, Arizona, Arkansas, California, Colorado, C...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
codes <- read_csv(here('data/demographic/statecodes.csv'))
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): state, abbrev, code
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
students <- students %>%
  filter(Year =="2020-21") %>% 
  select(-Total) %>%
  pivot_longer(-c(Year),
               names_to="state",
               values_to="student_population") %>%
  left_join(codes, by= c('state'='state')) %>%
  select(state=code, student_population, state_name = state)

corrected %>%
  select(state,population,positive,total) %>%
  distinct() %>%
  left_join(students) %>%
  ggplot(aes(x=student_population/population, y = total/population)) +
  geom_point()
## Joining with `by = join_by(state)`

corrected %>%
 # filter(state!="CA") %>%
  select(state,population,positive,total) %>%
  distinct()%>%
  left_join(students) %>%
  ggplot(aes(x=fct_reorder(state_name, total/population),
             y=total/population,
             fill =student_population/population)) +
  geom_boxplot() +
  scale_fill_binned(type="viridis",
                    n.breaks=8,
                    direction=-1,
                    option="magma") +
  theme_c()+
  theme(legend.text=element_text(size=8),
        legend.title = element_text(size=19)) +
  coord_flip() +
  labs(x="",
       fill = "Ratio of Student Population\nto Census Population",
       y= "Total Number of Tests Over Population Size",
       subtitle =
         "Color by Ratio of Student Population to Population\n Reported in the Census",
       title = "Testing Rate by State Across All 2-Week Intervals") +
  scale_y_continuous(n.breaks =6)
## Joining with `by = join_by(state)`

ggsave(here::here(paste0('thesis/figure/college_populations.jpeg')), 
       height=11, width = 11, dpi=400)


corrected %>%
  mutate(testrate=total/population) %>%
  filter(testrate>.25) %>% 
 # filter( abs(testrate - max(testrate)) <=.05) %>%
  select(date,testrate, state) %>%
  distinct() 

Ratio of Unobserved to Observed

Considering version using \(P(S_1|\text{untested})\) centered at empirical value.

subset %>% 
  ggplot(aes(x=date, ymin = exp_cases_lb/positive,
             ymax = exp_cases_ub/positive)) +
  geom_ribbon() +
  theme_c()+ 
  facet_wrap(~state)

Intervals by State and Biweek

pal <- c("red", "#10BAC5", "#1B10C5", "#EFB719", "#900C3F")

names(pal) <- c("Covidestim",
                '$P(S_1|untested)$ Centered at Emp. Value',
                '$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
               "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
               "$\\beta$ Centered at Emp. Value"
               )



states_all_versions <- corrected %>%
  select(state,version) %>%
  group_by(state) %>%
  summarize(n_versions = n_distinct(version)) %>% 
  filter(n_versions ==4) 


cat(paste0("### Number of states with 4 versions is: ", states_all_versions$state %>% unique() %>% length()))
## ### Number of states with 4 versions is: 27
cat(paste0("### Number of states with at least 1 versions is: ", corrected$state %>% unique() %>% length()))
## ### Number of states with at least 1 versions is: 51
end <- length(unique(states_all_versions$state))
n <- floor(end/2)

# first group
corrected %>%
  filter(state %in% states_all_versions$state[1:n]) %>%
 # filter(state %in% sample(corrected$state,5)) %>%
  filter(biweek >=6) %>%
  group_by(biweek) %>%
  mutate(xmin = min(date),
         xmax = max(date)) %>%
  ungroup() %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = vlabel),
             alpha = .7,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .7) +
   # geom_linerange(aes(xmin = xmin,
   #                    xmax=xmax,
   #                    y = positive,
   #                    color = 'obs')) +
  facet_grid(state~vlabel, scales = "free_y",
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "2 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x = element_text(angle = 60, size = 16),
          plot.title=element_text(face = "bold",
                                  hjust = .5, 
                                  size = 20,
                                  margin=margin(5,5,15,5,'pt')),
          strip.background = element_rect(fill = "#393939"),
          strip.text = element_text(color =  "white", size = 16),
          strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
                                      size = 14,
                                      face="bold"),
          strip.text.x =  element_text(margin = margin(5,3,5,3,'pt'),
                                       face = "bold",
                                       size = 10),
          plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
                                       face = "italic"),
          legend.position = "top",
          legend.text =element_text(size = 14)) +
  scale_fill_manual(values =pal,
                    labels = TeX(names(pal)),
                    name='')  +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by Version Across the United States")) +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here::here(paste0('thesis/figure/state_comp_covidestim1.pdf')), 
       height=24, width = 16, dpi=400)



corrected %>%
  filter(state %in% states_all_versions$state[n+1:end]) %>%
 # filter(state %in% sample(corrected$state,5)) %>%
  filter(biweek >=6) %>%
  group_by(biweek) %>%
  mutate(xmin = min(date),
         xmax = max(date)) %>%
  ungroup() %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = vlabel),
             alpha = .6,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .8) +
   # geom_linerange(aes(xmin = xmin,
   #                    xmax=xmax,
   #                    y = positive,
   #                    color = 'obs')) +
  facet_grid(state~vlabel, scales = "free_y",
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "2 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x = element_text(angle = 60, size = 16),
          plot.title=element_text(face = "bold",
                                  hjust = .5, 
                                  size = 20,
                                  margin=margin(5,5,15,5,'pt')),
          strip.background = element_rect(fill = "#393939"),
          strip.text = element_text(color =  "white", size = 16),
          strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
                                      size = 14,
                                      face="bold"),
          strip.text.x =  element_text(margin = margin(5,3,5,3,'pt'),
                                       face = "bold",
                                       size = 10),
          plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
                                       face = "italic"),
          legend.position = "top",
          legend.text =element_text(size = 14)) +
  scale_fill_manual(values =pal,
                    labels = TeX(names(pal)),
                    name='')  +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by Version Across the United States")) +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here::here('thesis/figure/state_comp_covidestim2.pdf'), 
       height=24, width = 16, dpi=400)
#######################
# for presentation
#######################
corrected %>%
  filter(state %in% c("MA", "MI")) %>%
 # filter(state %in% sample(corrected$state,5)) %>%
  filter(biweek >=6) %>%
  group_by(biweek) %>%
  mutate(xmin = min(date),
         xmax = max(date)) %>%
  ungroup() %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = vlabel),
             alpha = .7,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .7) +
   # geom_linerange(aes(xmin = xmin,
   #                    xmax=xmax,
   #                    y = positive,
   #                    color = 'obs')) +
  facet_grid(state~vlabel, scales = "free_y",
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "2 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x = element_text(angle = 40, size = 10),
          axis.text.y = element_text( size = 7),
          plot.title=element_text(face = "bold",
                                  hjust = .5, 
                                  size = 20,
                                  margin=margin(5,5,15,5,'pt')),
          strip.background = element_rect(fill = "#393939"),
          strip.text = element_text(color =  "white", size = 16),
          strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
                                      size = 14,
                                      face="bold"),
          strip.text.x =  element_text(margin = margin(5,3,5,3,'pt'),
                                       face = "bold",
                                       size = 10),
          plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
                                       face = "italic"),
          legend.position = "top",
          legend.text =element_text(size = 14)) +
  scale_fill_manual(values =pal,
                    breaks='Covidestim',
                    labels = TeX(names(pal)),
                    name='')  +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by Version (State Level)")) 

ggsave(here('presentation/figure/state_level_mi_ma.jpeg'), width =13, height=9)
ratios <- corrected %>%
  filter(vlabel == "$P(S_1|untested)$ Centered at Emp. Value") %>% 
 # filter(state %in% sample(unique(corrected$state),5)) %>%
  mutate(ratio_undetected_lb = exp_cases_lb/ positive,
         ratio_undected = exp_cases_median/positive,
         ratio_undetected_ub = exp_cases_ub/ positive) %>%
  group_by(state) %>%
  mutate(m_ratio = median(ratio_undected)) %>%
  ungroup() %>%
  mutate(state=fct_reorder(state,m_ratio)) %>%
  arrange(state)

states_ordered <-ratios$state %>% unique()

ratios %>%
 # filter(state %in% states_ordered[1:n])  %>%
  ggplot(aes(x=date,ymin=ratio_undetected_lb, ymax=ratio_undetected_ub)) +
  geom_ribbon(
              alpha = .8) +
   # geom_linerange(aes(xmin = xmin,
   #                    xmax=xmax,
   #                    y = positive,
   #                    color = 'obs')) +
  facet_wrap(~state,
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed)),
             ncol = 5) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "2 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x = element_text(angle = 60, size = 16),
          plot.title=element_text(face = "bold",
                                  hjust = .5, 
                                  size = 20,
                                  margin=margin(5,5,15,5,'pt')),
          strip.background = element_rect(fill = "#393939"),
          strip.text = element_text(color =  "white", size = 16),
          strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
                                      size = 14,
                                      face="bold"),
          strip.text.x =  element_text(margin = margin(5,3,5,3,'pt'),
                                       face = "bold",
                                       size = 10),
          plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
                                       face = "italic"),
          legend.position = "top",
          legend.text =element_text(size = 14)) +
  scale_fill_manual(values =pal,
                    labels = TeX(names(pal)),
                    name='')  +
  labs(y = "Ratio of Estimated Infections to Observed Cases",
       title = paste0("Ratio of Estimated to Observed Cases")) +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))